{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using Theano backend.\n",
      "Using gpu device 0: GeForce GT 740M (CNMeM is enabled with initial size: 80.0% of memory, cuDNN not available)\n"
     ]
    }
   ],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(2016)\n",
    "\n",
    "import os\n",
    "import glob\n",
    "import math\n",
    "import pickle\n",
    "import datetime\n",
    "\n",
    "from keras.layers import Input, Embedding, LSTM, Dense,Flatten, Dropout, merge\n",
    "from keras.models import Model\n",
    "\n",
    "def load_train():\n",
    "    X_train_uid=[]\n",
    "    X_train_iid=[]\n",
    "    Y_train_score=[]\n",
    "\n",
    "    path = os.path.join('../scripts',  'train.txt')\n",
    "    print('Read train data',path)\n",
    "\n",
    "    f = open(path, 'r')\n",
    "    line = f.readline()\n",
    "    while (1):\n",
    "        line = f.readline()\n",
    "        if line == '':\n",
    "            break\n",
    "        arr = line.strip().split(',')\n",
    "        X_train_uid.append(int(arr[0]))\n",
    "        X_train_iid.append(int(arr[1]))\n",
    "        Y_train_score.append(float(arr[2]))\n",
    "    f.close()\n",
    "    return X_train_uid,X_train_iid,Y_train_score\n",
    "\n",
    "def load_test():\n",
    "    X_test_uid=[]\n",
    "    X_test_iid=[]\n",
    "\n",
    "    path = os.path.join('../scripts',  'test.txt')\n",
    "    print('Read test data',path)\n",
    "\n",
    "    f = open(path, 'r')\n",
    "    line = f.readline()\n",
    "    while (1):\n",
    "        line = f.readline()\n",
    "        if line == '':\n",
    "            break\n",
    "        arr = line.strip().split(',')\n",
    "        X_test_uid.append(int(arr[0]))\n",
    "        X_test_iid.append(int(arr[1]))\n",
    "    f.close()\n",
    "    return X_test_uid,X_test_iid\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('Read train data', '../scripts/train.txt')\n",
      "load train data OK.\n",
      "('Read test data', '../scripts/test.txt')\n",
      "load test data OK.\n"
     ]
    }
   ],
   "source": [
    "X_train_uid,X_train_iid,Y_train_score = load_train()\n",
    "#print len(X_train_uid),X_train_uid[33177260],max(X_train_uid)\n",
    "#print len(X_train_iid),X_train_iid[33177260],max(X_train_iid)\n",
    "#print len(Y_train_score),Y_train_score[33177260]\n",
    "print \"load train data OK.\"\n",
    "\n",
    "X_test_uid,X_test_iid = load_test()\n",
    "#print len(X_test_uid),X_test_uid[100],max(X_test_uid)\n",
    "#print len(X_test_iid),X_test_iid[100],max(X_test_iid)\n",
    "print \"load test data OK.\"\n",
    "\n",
    "# normalize train date\n",
    "X_train_uid=np.array(X_train_uid)\n",
    "X_train_uid=X_train_uid.reshape(X_train_uid.shape[0],1)\n",
    "\n",
    "X_train_iid=np.array(X_train_iid)\n",
    "X_train_iid=X_train_iid.reshape(X_train_iid.shape[0],1)\n",
    "\n",
    "Y_train_score = np.array(Y_train_score).astype('float32')\n",
    "Y_train_score = (Y_train_score)/ 10\n",
    "\n",
    "# normalize test date\n",
    "X_test_uid=np.array(X_test_uid)\n",
    "X_test_uid=X_test_uid.reshape(X_test_uid.shape[0],1)\n",
    "\n",
    "X_test_iid=np.array(X_test_iid)\n",
    "X_test_iid=X_test_iid.reshape(X_test_iid.shape[0],1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# define model\n",
    "input_1=Input(shape=(1,), dtype='int32')\n",
    "input_2=Input(shape=(1,), dtype='int32')\n",
    "x1=Embedding(output_dim=128, input_dim=40000, input_length=1)(input_1)\n",
    "x2=Embedding(output_dim=128, input_dim=120, input_length=1)(input_2)\n",
    "x1=Flatten()(x1)\n",
    "x2=Flatten()(x2)\n",
    "x = merge([x1, x2], mode='concat')\n",
    "x = Dropout(0.2)(x)\n",
    "x = Dense(512, activation='relu')(x)\n",
    "x = Dropout(0.2)(x)\n",
    "x = Dense(64, activation='relu')(x)\n",
    "x = Dropout(0.2)(x)\n",
    "out = Dense(1, activation='sigmoid')(x)\n",
    "model = Model(input=[input_1, input_2], output=out)\n",
    "model.compile(optimizer='rmsprop',\n",
    "              loss='mean_squared_error',\n",
    "              metrics=[])\n",
    "# train model\n",
    "model.fit([X_train_uid, X_train_iid], Y_train_score,nb_epoch=10, batch_size=1024*6)\n",
    "\n",
    "# predict\n",
    "Y_test_score = model.predict([X_test_uid, X_test_iid],batch_size=2048)\n",
    "Y_test_score = Y_test_score * 10\n",
    "\n",
    "f=open(\"predict_DL.csv\",\"w\")\n",
    "#f.write(\"score\\n\")\n",
    "for i in range(Y_test_score.shape[0]):\n",
    "    f.write(\"{:1.4f}\".format(Y_test_score[i,0]))\n",
    "    f.write(\"\\n\")\n",
    "f.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
